1c07c3d38bc8aa873075799f9d4ba59075dd8f9a,src/edu/stanford/nlp/parser/dvparser/DVModel.java,DVModel,readWordVectors,#,501

Before Change


    int chineseNumberCount = 0;
    int chinesePercentCount = 0;

    System.err.println("Reading in the word vector file: " + op.lexOptions.wordVectorFile);
    int dimOfWords = 0;
    boolean warned = false;
    for (String line : IOUtils.readLines(op.lexOptions.wordVectorFile, "utf-8")) {
      String[]  lineSplit = line.split("\\s+");
      String word = lineSplit[0];
      if (op.wordFunction != null) {
        word = op.wordFunction.apply(word);
      }
      dimOfWords = lineSplit.length - 1;
      if (op.lexOptions.numHid <= 0) {
        op.lexOptions.numHid = dimOfWords;
        System.err.println("Dimensionality of numHid not set.  The length of the word vectors in the given file appears to be " + dimOfWords);
      }
      // the first entry is the word itself
      // the other entries will all be entries in the word vector
      if (dimOfWords > op.lexOptions.numHid) {
        if (!warned) {
          warned = true;
          System.err.println("WARNING: Dimensionality of numHid parameter and word vectors do not match, deleting word vector dimensions to fit!");
        }
        dimOfWords = op.lexOptions.numHid;
      } else if (dimOfWords < op.lexOptions.numHid) {
        throw new RuntimeException("Word vectors file has dimension too small for requested numHid of " + op.lexOptions.numHid);
      }
      double vec[][] = new double[dimOfWords][1];
      for (int i = 1; i <= dimOfWords; i++) {
        vec[i-1][0] = Double.parseDouble(lineSplit[i]);
      }
      SimpleMatrix vector = new SimpleMatrix(vec);
      wordVectors.put(word, vector);

      // TODO: factor out all of these identical blobs
      if (op.trainOptions.unknownNumberVector &&

After Change


    int chineseNumberCount = 0;
    int chinesePercentCount = 0;

    Map<String, SimpleMatrix> rawWordVectors = RNNUtils.readRawWordVectors(op.lexOptions.wordVectorFile, op.lexOptions.numHid);

    for (String word : rawWordVectors.keySet()) {
      SimpleMatrix vector = rawWordVectors.get(word);

      if (op.wordFunction != null) {
        word = op.wordFunction.apply(word);
      }

      if (op.lexOptions.numHid <= 0) {
        op.lexOptions.numHid = vector.getNumElements();
      }

      // TODO: factor out all of these identical blobs